Reference¶
The data set has been taken from Graham, Daniel B. and Allinson, Nigel (1998) Face database Datset
The following Github link has been used for reference Repository
Read Me¶
There are two zip files that need to be added before running below snippets. Zip files need to be added in the content directory.
Assumptions¶
There are total 20 different faces. Each face has different 15-25 different angle,lighting images.
Some of these images has been removed from the dataset and used as test images. So there are total 20 test images and 546 dataset images.
To find the accuracy, each test image is compared with 546 images of the dataset and error is found. 4000 has been kept has threshold for error. Thats is if error is less than 4000 than image is detected otherwise not. All 20 faces are labbeled as 1a to 1t.
Accuracy for a face is defined as (TP+TN)/(TP+TN+FP+FN)
# Run this file to unzip the uploaded dataset
import zipfile
with zipfile.ZipFile("test.zip","r") as zip_ref:
zip_ref.extractall()
with zipfile.ZipFile("cropped.zip","r") as zip_ref:
zip_ref.extractall()
# importiting utilities
import os, glob
from sklearn import preprocessing
import cv2
import numpy as np
import matplotlib.pyplot as plt
import math
# setting path to the dataset
dataset_path = os.getcwd() + '/cropped/'
test_path = os.getcwd() + '/test/'
#function to plot the images
def plot_portraits(images, titles, h, w, n_row, n_col):
plt.figure(figsize=(2.2 * n_col, 2.2 * n_row))
plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.20)
for i in range(n_row * n_col):
plt.subplot(n_row, n_col, i + 1)
plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
plt.title(titles[i])
plt.xticks(())
plt.yticks(())
#to get the total number of images
total_images = 0
shape = None
for images in glob.glob(dataset_path + '/**', recursive=True):
if images[-3:] == 'pgm':
total_images += 1
print(total_images)
546
shape = (112,92) #size of the images
all_images = np.zeros((total_images, shape[0], shape[1]) ,dtype='float64') #initialize the numpy array
names = list()
i = 0
for folder in glob.glob(dataset_path + '/*'+'/face'): #iterate through all the class
for image in glob.glob(folder + '/*'): #iterate through each folder (class)
names.append(folder[-7:-5]) #list for the different faces
read_image = cv2.imread(image, cv2.IMREAD_GRAYSCALE) #read the image in grayscale
resized_image = cv2.resize(read_image, (shape[1], shape[0])) #cv2.resize resizes an image into (# column x # height)
all_images[i] = np.array(resized_image)
i += 1
plot_portraits(all_images, names, 112,92, 21, 26) #plotting all 546 images with names
546
A = np.resize(all_images, (total_images, shape[0]*shape[1])) #convert the images into vectors. Each row has an image vector. i.e. all_images x image_vector matrix
mean_vector = np.sum(A, axis=0, dtype='float64')/total_images #calculate the mean vector
mean_matrix = np.tile(mean_vector, (total_images, 1)) #make a 546 copy of the same vector. 574 x image_vector_size matrix.
A_tilde = A - mean_matrix #mean-subtracted image vectors
plt.imshow(np.resize(mean_vector, (shape[0],shape[1])), cmap='gray') #show the mean image vector
plt.title('Mean Image')
plt.show()
plot_portraits(A_tilde, names, 112,92, 21, 26) # matrix A_tilde that contains vectors of each mean subtracted img
L = (A_tilde.dot(A_tilde.T))/total_images #since each row is an image vector (unlike in the notes, L = (A_tilde)(A_tilde.T) instead of L = (A_tilde.T)(A_tilde)
print("L shape : ", L.shape)
eigenvalues, eigenvectors = np.linalg.eig(L) #find the eigenvalues and the eigenvectors of L
idx = eigenvalues.argsort()[::-1] #get the indices of the eigenvalues by its value. Descending order.
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx] #sorted eigenvalues and eigenvectors in descending order
L shape : (546, 546)
eigenvectors_C = A_tilde.T @ eigenvectors #linear combination of each column of A_tilde
eigenvectors_C.shape #each column is an eigenvector of C where C = (A_tilde.T)(A_tilde). NOTE : in the notes, C = (A_tilde)(A_tilde.T)
(10304, 546)
#normalize the eigenvectors
eigenfaces = preprocessing.normalize(eigenvectors_C.T) #normalize only accepts matrix with n_samples, n_feature. Hence the transpose.
eigenfaces.shape
(546, 10304)
#to visualize some of the eigenfaces
eigenface_labels = [x for x in range(eigenfaces.shape[0])] #list containing values from 1 to number of eigenfaces
plot_portraits(eigenfaces, eigenface_labels , 112,92, 21, 26)